***************
* This file analyses the data of all job ads published on Job-Room
* Author: Daniel Kopp
***************

clear
clear matrix
clear mata

set maxvar 8000
set seed 1

import delimited using "data_processed\job_ads_from_api.csv"

gen stellenbezeichnung = job_title

drop if stellenbezeichnung== "" // 636 obs deleted
compress ad_hash

* Drop duplicates
cap drop dup
duplicates tag ad_hash, gen(dup)
tab dup 	// 0.41%

bys ad_hash: gen n_dup = _n
tab n_dup
drop if n_dup==2		// 2,397 (0.21%)
drop n_dup /*dup*/

* Format von Datumvariablen
foreach i in  created publication_start_date publication_end_date    {
	    g `i'_tmp = date(`i', "YMD")
		format `i'_tmp %tdCCYY-NN-DD
		drop  `i'
		rename  `i'_tmp `i'
}

gen diff_created_pub = publication_start_date-created
sum diff_created_pub,d
	
drop if created==.  // 231 obs deleted

local date_var "created"		
count if `date_var'<td(31dec2018)							// 0   
count if inrange(`date_var',td(31dec2018),td(01jul2019))	// 0 
count if inrange(`date_var',td(01jul2019),td(31dec2019))	// 477  - all created are from December 31
count if inrange(`date_var',td(31dec2019),td(01jul2020))	 
count if inrange(`date_var',td(01jul2020),td(31dec2020))	
count if inrange(`date_var',td(31dec2020),td(01jun2021))	
count if inrange(`date_var',td(01jun2021),td(31dec2021))	// 0 
count if inrange(`date_var',td(31dec2021),td(01jul2022))	// 0 

gen month_created = month(created)
gen year_created = year(created)

tab month_created if year_created==2020
tab month_created if year_created==2021

* -> sample consists of ads created between 31dec2019 und 01jun2021

* How many of the ads are from other platforms and how many are reported directly to Job-Room (via API, Job-Room platform, or Regional employment agency)  
tab source, m		// 72% are from other platforms

* Gen canton of a workplace
destring postcode , gen(plz) force

merge m:1 plz using "Misc_files\plz_kanton.dta"
drop if _merge==2
drop _merge

gen cod_gemeinde = communalcode

merge m:1 cod_gemeinde using "Misc_files\key_gemeinde_kanton.dta"
drop if _merge==2
drop _merge 

gen 	kanton = "ZH" if txt_kanton=="Zürich"
replace kanton = "BE" if txt_kanton=="Bern / Berne"
replace kanton = "LU" if txt_kanton=="Luzern"
replace kanton = "UR" if txt_kanton=="Uri"
replace kanton = "SZ" if txt_kanton=="Schwyz"
replace kanton = "ZG" if txt_kanton=="Zug"
replace kanton = "FR" if txt_kanton=="Fribourg / Freiburg"
replace kanton = "BS" if txt_kanton=="Basel-Stadt"
replace kanton = "SH" if txt_kanton=="Schaffhausen"
replace kanton = "BL" if txt_kanton=="Basel-Landschaft"
replace kanton = "SG" if txt_kanton=="St. Gallen"
replace kanton = "SO" if txt_kanton=="Solothurn"
replace kanton = "AI" if txt_kanton=="Appenzell Innerrhoden"
replace kanton = "AR" if txt_kanton=="Appenzell Ausserrhoden"
replace kanton = "GR" if txt_kanton=="Graubünden / Grigioni / Grischun"
replace kanton = "AG" if txt_kanton=="Aargau"
replace kanton = "TG" if txt_kanton=="Thurgau"
replace kanton = "TI" if txt_kanton=="Ticino"
replace kanton = "VD" if txt_kanton=="Vaud"
replace kanton = "VS" if txt_kanton=="Valais / Wallis"
replace kanton = "NE" if txt_kanton=="Neuchâtel"
replace kanton = "GE" if txt_kanton=="Genève"
replace kanton = "JU" if txt_kanton=="Jura"
replace kanton = "NW" if txt_kanton=="Nidwalden"
replace kanton = "OW" if txt_kanton=="Obwalden"
replace kanton = "GL" if txt_kanton=="Glarus"

replace kanton = ktkz if kanton=="" & dup==0

tab city if kanton =="" 

replace kanton = "TG" if city=="Thurgau" | city=="Region Thurgau" | city=="Region Oberthurgau" | city=="Thurgau / Bodensee" 
replace kanton = "ZH" if city=="Unterstammheim" 
replace kanton = "GL" if city=="Glarnerland" 
replace kanton = "ZH" if city=="Limmattal"  // parts of the Limmattal are also in AG but postcode 8953 is Dietikon (ZH)
replace kanton = "ZH" if city=="Stadt Zürich / Zürichsee"  
replace kanton = "AG" if city=="Aargau"
replace kanton = "BS" if city=="Basel Stadt"
replace kanton = "BS" if city=="Basel"
replace kanton = "BS" if city=="Bâle"
replace kanton = "VD" if city=="Canton de Vaud"
replace kanton = "GR" if city=="Graubünden"
replace kanton = "GR" if city=="Kanton Graubünden"
replace kanton = "LU" if city=="Lucerne" 
replace kanton = "AG" if city=="Region Aargau"
replace kanton = "TI" if city=="Tessin" 
replace kanton = "TI" if city=="Ticino" 
replace kanton = "ZH" if city=="Zürcher Oberland"
replace kanton = "ZH" if city=="zürich"

tab kanton, m		// 6.1% could not be classified, some of them are from Liechenstein, sometimes it only says "Switzerland" without specification of the exact location

****
* Add isco occupations to the job ads:
****

preserve
import delimited using "data_processed\job_ads_from_api_occupation.csv", clear

duplicates drop ad_hash isco, force

sort ad_hash isco
gen double shuffle = runiform()		// necessary to replicate the results because we have sometimes similar iscos per ad_hash
bys ad_hash (shuffle): gen n_isco = _n
tab n_isco,m		// 1153037 96% are unique 
drop shuffle

rename isco isco_
	
drop cod_avam 
	
reshape wide isco_ , i(ad_hash) j(n_isco)
tempfile api_ad_occup_wide
save `api_ad_occup_wide'
restore

merge m:1 ad_hash using `api_ad_occup_wide'		 
drop if _merge==2
drop _merge

replace isco_1=isco_2 if isco_1==.
replace isco_1=isco_3 if isco_1==.

count if isco_2!=. & isco_2!=isco_1  // 36,992 (3.2%)

* In the following, we will only use the first isco code (note that for more than 96% of the job ads we have only one code)
gen isco4 = isco_1
drop isco_1 isco_2 isco_3

* Gen Isco 3 digit
tostring  isco4, gen(isco4_str)
gen 	isco3_str 	= substr(isco4_str,1,strlen(isco4_str)-1)
destring isco3_str	, gen(isco3)
drop isco4_str isco3_str

* Gen Isco 2 digit
tostring  isco3, gen(isco3_str)
gen 	isco2_str 	= substr(isco3_str,1,strlen(isco3_str)-1)
destring isco2_str	, gen(isco2)
drop isco2_str isco3_str

* Gen Isco 1 digit
tostring  isco2, gen(isco2_str)
gen 	isco1_str 	= substr(isco2_str,1,strlen(isco2_str)-1)
destring isco1_str	, gen(isco1)
drop isco1_str isco2_str

forvalues i = 1/4 {
cap run "Help_files\label_isco08_`i'.do"
label values isco`i' isco08_`i'_lab
}

******************************
* Descriptive stats
******************************

* We drop the few obs where workload is missing
drop if workload_min==.		// 235

* The sample period covers ads created between 31dec2019 und 01jun2021
count		//   1,152,571

distinct company_name  // 63'529 distinct company names

* Gen pensum_cat
gen 	pensum_cat = 0 if workload_min==100		// only full-time
replace pensum_cat = 1 if workload_min<100 & workload_max==100 // full-time or part-time
replace pensum_cat = 2 if workload_max<100 // only part-time

label define pensum 0 "Full-time job" 1 "Full- or part-time job" 2 "Part-time job"
label values pensum_cat pensum

tab pensum_cat,m			// 78.6% post full-time jobs, 11.2% cover both full- and part-time jobs, and 10.2% post only part-time jobs

* Save the share of job ads by isco 1
preserve
drop if isco1==. 
gen total_obs_api = _N
collapse total_obs_api (count) obs_isco1_api=workload_min, by(isco1)
	gen share_obs_isco1_api = obs_isco1_api/total_obs_api
tabstat share_obs_isco1_api , by(isco1) notot
rename isco1 isco_1
save "Misc_files\job_ads_by_isco1_api.dta", replace
restore


* We merge the number of employees per ISCO 4-digit occupation from the Structural Survey ("Strukturerhebung") to our dataset. 
* This file contains all Isco 4-digit occupations (498, respectively 495 if we disregard the 3-digit codes).  
gen s_isco_08_4 = isco4
merge m:1 s_isco_08_4 using "Misc_files\employment_by_isco4.dta", keepusing(employed_isco4)
drop if _merge==2
drop _merge

	
***********
* Descriptive statistics on full-/part-time jobs
***********	

	
******************
* Gen detailed part-time categories
******************

gen 	pensum_det_10 = workload_max==100
forvalues i = 0/9 {
		gen 	pensum_det_`i' = 0
		forvalues x = 0/9 {
			replace pensum_det_`i' = 1 if inrange(`i'`x',workload_min,workload_max)		
		}
}


******
* Share of part-/full-time job ads including the share of clicks on part-/full-time jobs
******

* Note that clicks have only been recorded as of june 2020

gen month_abm = mofd(publication_end_date)
format month_abm %tm

local color1 "black"
local color2 "gs10"
preserve
drop if month_abm<tm(2020m6) // We only keep only job ads that were visible on the platform after june 2020, as no clicks were recorded before
distinct ad_hash				// 910'379

keep ad_hash pensum_det_0 pensum_det_1 pensum_det_2 pensum_det_3 pensum_det_4 pensum_det_5 pensum_det_6 pensum_det_7 pensum_det_8 pensum_det_9  pensum_det_10

reshape long pensum_det_, i(ad_hash) j(num_pensum)
collapse pensum_det_, by(num_pensum)
tabstat pensum_det_ , by(num_pensum)

label define num_pensum_lab 0 "0-9" 1 "10-19" 2 "20-29" 3 "30-39" 4 "40-49" 5 "50-59" 6 "60-69" 7 "70-79" 8 "80-89" 9 "90-99" 10 "100" 
label values num_pensum num_pensum_lab

merge 1:1 num_pensum using "Misc_files\share_job_ad_clicks_by_FTE_temp_api_sample.dta" 

drop if num_pensum==0		// we don't show the lowest category since there are almost no observations

replace pensum_det_click = pensum_det_click*100
replace pensum_det_ = pensum_det_*100

graph bar (mean) pensum_det_click pensum_det_  , over(num_pensum, gap(0.2) label(labsize(small)) ) bar(1,color(`color2') lcolor(white)) bar(2,color(`color1') lcolor(white)) blabel(bar, format(%2.0f) size(vsmall))  ylabel(, format(%9.0fc) labsize(medsmall))   b1title(Weekly hours in full-time equivalents FTE, margin(medsmall)) graphregion(color(white)) bgcolor(white)  legend(label(1 "Share of job ad views") label(2 "Share of job ads"))
graph export "$results_part_time\figure_4.eps" , as(eps) replace				
restore


***
* Parttime-share by occupation:
***

tab pensum_cat,m nol

gen fulltime = pensum_cat==0
gen parttime = pensum_cat==2
gen part_or_fulltime = inrange(pensum_cat,1,2)


* Produce table with share of part-time ads among posted jobs and share of clicks on part-time jobs
* To be consistent with the click sample, we focus on ads visible after June 2020

eststo clear
preserve

drop if month_abm<tm(2020m6) // We only keep only job ads that were visible on the platform after june 2020, as no clicks were recorded before
distinct ad_hash				// 910'379

cap drop obs_isco1
bys isco1: gen obs_isco1 = _N

collapse parttime  obs_isco1, by(isco1)

merge 1:1 isco1 using "Misc_files\share_job_ad_clicks_on_parttime_by_isco1_api_sample.dta"	

drop if obs_isco1<50			// We drop occupations with less than 50 observations 
estpost tabstat parttime, by(isco1) notot
estimates store part_noweight
estpost tabstat parttime_clicks, by(isco1) notot
estimates store partclicks_noweight
estpost tabstat obs_isco1, by(isco1)  notot
estimates store obs_noweight

esttab part_noweight partclicks_noweight obs_noweight, cells("mean(fmt(%9.2gc))") label noobs collabels(none) varlabels(`e(labels)')  ///
		mtitle("Part-time ads" "Part-time clicks"  "\# obs") ///
		title(Share of part-time job ads by occupation) 
esttab part_noweight partclicks_noweight obs_noweight using "$results_part_time\table_d7.tex", replace ///
		cells("mean(fmt(%9.2gc))") label noobs collabels(none) varlabels(`e(labels)') frag ///
		mtitle("Part-time ads" "Part-time clicks"  "\# obs") ///
		title(Share of part-time job ads by occupation) 		
restore 



****
* Use regular expressions to extract from jobtitles whether they address men, women, or both 
****

gen rav_oste = kanton 

run "Help_files\extract_gender_jobtitle.do"

* Gen indicator whether jobtitle could be attributed to a category 
gen title_gender = male_title==1 | female_title==1 | neutral_title==1
tab title_gender,m		// 74.4%

tab male_title,m 		// 24.6%
tab female_title,m		// 1.4%
tab neutral_title,m		// 48.4%


*********** 
* Test whether gender preferences, indicated by the grammatical gender of the job title, are associated with the work volumen
***********
	
********************************************
* Regress a dummy for male_title/female_title on fulltime/parttime: 
********************************************

encode company_name, gen(company_name_num)

eststo clear
local cond "  "
foreach x in   male_title female_title  {
		quietly reg `x' i.pensum_cat `cond', cluster(company_name_num)
			quietly sum  `x' 	if e(sample)==1
			local mean_disp	: disp %4.3f r(mean)
			estadd local mean_disp "`mean_disp'"
			estadd local canton "No"						
			estadd local isco "No"
			estadd local firm "No"
		eststo no_cov_`x'
		
		quietly reghdfe `x' i.pensum_cat `cond', absorb(cod_kanton) cluster(company_name_num)
			quietly sum  `x' 	if e(sample)==1
			local mean_disp	: disp %4.3f r(mean)
			estadd local mean_disp "`mean_disp'"
			estadd local canton "Yes"			
			estadd local isco "No"
			estadd local firm "No"
		eststo canton_`x'
		
		quietly reghdfe `x'  i.pensum_cat `cond', absorb(cod_kanton isco4) cluster(company_name_num)
			quietly sum  `x' 	if e(sample)==1
			local mean_disp	: disp %4.3f r(mean)
			estadd local mean_disp "`mean_disp'"
			estadd local canton "Yes"						
			estadd local isco "Yes"
			estadd local firm "No"
		eststo isco_`x'
	
		quietly reghdfe `x'  i.pensum_cat   `cond', absorb(cod_kanton isco4 company_name_num)	cluster(company_name_num)	// within firm and occupation
			quietly sum  `x' 	if e(sample)==1
			local mean_disp	: disp %4.3f r(mean)
			estadd local mean_disp "`mean_disp'"	
			estadd local canton "Yes"						
			estadd local isco "Yes"
			estadd local firm "Yes"
		eststo firm_isco_`x'	
}
	* Men
	esttab no_cov_male_title  canton_male_title isco_male_title  firm_isco_male_title ,  ///
			varwidth(30) cells("b(star fmt(%9.2gc))" "se(par fmt(%9.2gc))")  label collabels(none) nomtitles nodepvar star(* 0.1 ** 0.05 *** 0.01)    ///
			keep(1.pensum_cat 2.pensum_cat  )	///
			coeflabels(1.pensum_cat "Full- or part-time job" 2.pensum_cat "Part-time job" ) ///
			refcat(1.pensum_cat "Full-time job")  ///
			title(Dep. var: preference for men in title) ///
			stats( canton isco firm mean_disp N, fmt(%9.0fc) label("Canton fixed effects" "Occupation fixed effects" "Firm fixed effects" "Mean dependent variable" "Observations" )) ///		
			addnotes("Standard errors in parentheses" "Standard Errors clustered at firm level")	
	esttab no_cov_male_title  canton_male_title isco_male_title  firm_isco_male_title using "$results_part_time\table_d11.tex", replace   ///
			varwidth(30) cells("b(star fmt(%9.2gc))" "se(par fmt(%9.2gc))")  label collabels(none) nomtitles nodepvar star(* 0.1 ** 0.05 *** 0.01)  frag  ///
			keep(1.pensum_cat 2.pensum_cat  )	///
			coeflabels(1.pensum_cat "Full- or part-time job" 2.pensum_cat "Part-time job" ) ///
			refcat(1.pensum_cat "Full-time job")  ///
			stats(canton isco firm mean_disp N, fmt(%9.0fc) label("Canton fixed effects" "Occupation fixed effects" "Firm fixed effects" "Mean dependent variable" "Observations" )) ///		
			addnotes("Standard errors in parentheses" "Standard Errors clustered at firm level")				

	* Women
	esttab no_cov_female_title  canton_female_title isco_female_title  firm_isco_female_title ,  ///
			varwidth(30) cells("b(star fmt(%9.2gc))" "se(par fmt(%9.2gc))")  label collabels(none) nomtitles nodepvar star(* 0.1 ** 0.05 *** 0.01)    ///
			keep(1.pensum_cat 2.pensum_cat  )	///
			coeflabels(1.pensum_cat "Full- or part-time job" 2.pensum_cat "Part-time job" ) ///
			refcat(1.pensum_cat "Full-time job")  ///
			title(Dep. var: preference for women in title) ///
			stats( canton isco firm mean_disp N, fmt(%9.0fc) label("Canton fixed effects" "Occupation fixed effects" "Firm fixed effects" "Mean dependent variable" "Observations" )) ///		
			addnotes("Standard errors in parentheses" "Standard Errors clustered at firm level")	
	esttab no_cov_female_title  canton_female_title isco_female_title  firm_isco_female_title using "$results_part_time\table_d12.tex", replace   ///
			varwidth(30) cells("b(star fmt(%9.2gc))" "se(par fmt(%9.2gc))")  label collabels(none) nomtitles nodepvar star(* 0.1 ** 0.05 *** 0.01)  frag  ///
			keep(1.pensum_cat 2.pensum_cat  )	///
			coeflabels(1.pensum_cat "Full- or part-time job" 2.pensum_cat "Part-time job" ) ///
			refcat(1.pensum_cat "Full-time job")  ///
			stats(canton isco firm mean_disp N, fmt(%9.0fc) label("Canton fixed effects" "Occupation fixed effects" "Firm fixed effects" "Mean dependent variable" "Observations" )) ///		
			addnotes("Standard errors in parentheses" "Standard Errors clustered at firm level")
			
			